Data Preparation¶

In [1]:
# import necassary libraries
import pandas as pd

#load datasets
fuel_data = pd.read_csv('fuel_prices.csv', delimiter=';')
transport_data = pd.read_csv('road_transport.csv', delimiter=';')
In [2]:
transport_data.head()
Out[2]:
Goederenstromen Perioden 1000 kg
0 Inkomend transport; totaal 2007* 103838966
1 Inkomend transport; totaal 2008* 103405705
2 Inkomend transport; totaal 2009* 89433416
3 Inkomend transport; totaal 2010* 89935769
4 Inkomend transport; totaal 2011* 91253760
In [3]:
fuel_data.head()
Out[3]:
Perioden Benzine Diesel Lpg
0 2006 zondag 1 januari 1,325 1,003 0,543
1 2006 maandag 2 januari 1,328 1,007 0,542
2 2006 dinsdag 3 januari 1,332 1,007 0,54
3 2006 woensdag 4 januari 1,348 1,02 0,55
4 2006 donderdag 5 januari 1,347 1,021 0,55

Prepare transport data¶

In [4]:
#only run this cell one time

#remove asterix from tansport data
transport_data['Perioden'] = transport_data['Perioden'].str.replace('*', '', regex=False)

#set perioden to datetime format year
transport_data['Perioden'] = pd.to_datetime(transport_data['Perioden'])
transport_data['Perioden'] = transport_data['Perioden'].dt.year

transport_data.head()
%store transport_data
Stored 'transport_data' (DataFrame)

Prepare tranpsort data for sub questions¶

In [5]:
#create dataframe for incomming transport
#filtering the data so only the value "inkomend transport; totaal" is given
transport_data_incoming = transport_data[(transport_data['Goederenstromen'] == 'Inkomend transport; totaal')].copy()
transport_data_incoming.reset_index(drop=True)

#Add percentage differences column with respect to the first year in the dataset (2007)
transport_data_incoming['Transport Inkomend Diff (%)'] = ((transport_data_incoming['1000 kg'] 
                                                           - transport_data_incoming['1000 kg'].iloc[0]) 
                                                          / transport_data_incoming['1000 kg'].iloc[0]) * 100

transport_data_incoming.head()
%store transport_data_incoming
Stored 'transport_data_incoming' (DataFrame)
In [6]:
#create dataframe for inbound throughgoing transport
transport_data_inthrough = transport_data[(transport_data['Goederenstromen'] == 'Inkomende doorvoer; totaal')].copy()

#Add percentage differences column with respect to the first year in the dataset (2007)
transport_data_inthrough['Transport inbound throughgoing Diff (%)'] = ((transport_data_inthrough['1000 kg'] 
                                                           - transport_data_inthrough['1000 kg'].iloc[0]) 
                                                          / transport_data_inthrough['1000 kg'].iloc[0]) * 100

transport_data_inthrough.head()
%store transport_data_inthrough
Stored 'transport_data_inthrough' (DataFrame)
In [7]:
#create dataframe for outbound throughgoing tranport
transport_data_outthrough = transport_data[(transport_data['Goederenstromen'] == 'Uitgaande doorvoer; totaal')].copy()

#Add percentage differences column with respect to the first year in the dataset (2007)
transport_data_outthrough['Transport outbound throughgoing Diff (%)'] = ((transport_data_outthrough['1000 kg'] 
                                                           - transport_data_outthrough['1000 kg'].iloc[0]) 
                                                          / transport_data_outthrough['1000 kg'].iloc[0]) * 100

transport_data_outthrough.head()
%store transport_data_outthrough
Stored 'transport_data_outthrough' (DataFrame)
In [8]:
#create dataframe for outgoing tranport
transport_data_outgoing = transport_data[(transport_data['Goederenstromen'] == 'Uitgaand transport; totaal')].copy()

#Add percentage differences column with respect to the first year in the dataset (2007)
transport_data_outgoing['Transport outgoing Diff (%)'] = ((transport_data_outgoing['1000 kg'] 
                                                           - transport_data_outgoing['1000 kg'].iloc[0]) 
                                                          / transport_data_outgoing['1000 kg'].iloc[0]) * 100

transport_data_outgoing.head()
%store transport_data_outgoing
Stored 'transport_data_outgoing' (DataFrame)

Prepare fuel data¶

In [9]:
#only run this cell one time
from datetime import datetime

# Custom function to convert a date string to a Pandas datetime
def parse_date(date_str):
    for i, month_name in enumerate(dutch_months):
        date_str = date_str.replace(month_name, datetime(2000, i+1, 1).strftime('%B'))

    for i, day_name in enumerate(dutch_days):
        date_str = date_str.replace(day_name, datetime(2000, 1, i+1).strftime('%A'))

    date_format = '%Y %A %d %B'
    return pd.to_datetime(date_str, format=date_format)

# Define Dutch months and day names
dutch_months = ['januari', 'februari', 'maart', 'april', 'mei', 'juni', 'juli', 'augustus', 'september', 'oktober', 'november', 'december']
dutch_days = ['zondag', 'maandag', 'dinsdag', 'woensdag', 'donderdag', 'vrijdag', 'zaterdag']

# Apply the custom function to the "Perioden" column
fuel_data['Perioden'] = fuel_data['Perioden'].apply(parse_date)

#set perioden to datetime format year
fuel_data['Perioden'] = fuel_data['Perioden'].dt.year

#replace , to . and set to float
fuel_data['Benzine'] = fuel_data['Benzine'].str.replace(',', '.', regex=False)
fuel_data['Benzine'] = fuel_data['Benzine'].astype(float)
fuel_data['Diesel'] = fuel_data['Diesel'].str.replace(',', '.', regex=False)
fuel_data['Diesel'] = fuel_data['Diesel'].astype(float)
fuel_data['Lpg'] = fuel_data['Lpg'].str.replace(',', '.', regex=False)
fuel_data['Lpg'] = fuel_data['Lpg'].astype(float)

# Group by year and calculate the mean for all columns
fuel_data = fuel_data.groupby('Perioden').mean().reset_index()

# Remove years that are not in the transport dataset
years_to_remove = [2006, 2022, 2023]

# Remove rows for the specified years
fuel_data = fuel_data[~fuel_data['Perioden'].isin(years_to_remove)]

# Display the DataFrame with the converted dates
fuel_data.head()
%store fuel_data
Stored 'fuel_data' (DataFrame)

Prepare fuel data for subquestions¶

In [10]:
#Add percentage differences column between fuel prices with respect to the first year in the dataset (2007)
fuel_data['Benzine Price Diff (%)'] = ((fuel_data['Benzine'] - fuel_data['Benzine'].iloc[0]) / 
                                       fuel_data['Benzine'].iloc[0]) * 100

fuel_data['Diesel Price Diff (%)'] = ((fuel_data['Diesel'] - fuel_data['Diesel'].iloc[0]) / 
                                      fuel_data['Diesel'].iloc[0]) * 100

fuel_data['Lpg Price Diff (%)'] = ((fuel_data['Lpg'] - fuel_data['Lpg'].iloc[0]) / 
                                   fuel_data['Lpg'].iloc[0]) * 100

fuel_data.head()
%store fuel_data
Stored 'fuel_data' (DataFrame)

Data analysis¶

In [11]:
#import libraries
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

How do fuel prices influence incoming transport of goods in the Netherlands?¶

In [12]:
# show price difference and transport difference in percentage in one plot

# Create a plotly figure
Figure = px.line()

# Add transport incoming difference % dataset
Figure.add_scatter(x=transport_data_incoming['Perioden'],
                   y=transport_data_incoming['Transport Inkomend Diff (%)'],
                   mode='lines+text',
                   name='Transport Inkomend Diff (%)',
                   text=transport_data_incoming['Transport Inkomend Diff (%)'].round(1).astype(str) + '%',
                   textposition="top center")

# Add the fuel prices, for three different columns benzine, diesel and lpg
for i in ['Benzine Price Diff (%)', 'Diesel Price Diff (%)', 'Lpg Price Diff (%)']:
    Figure.add_scatter(x=fuel_data['Perioden'],
                       y=fuel_data[i],
                       mode='lines+text',
                       name=i,
                       text=fuel_data[i].round(1).astype(str) + '%',
                       textposition="top center")

# Update the layout of the figure, add a title, x-axis title, and y-axis title
Figure.update_layout(title='Percentage differences over time with respect to the first year',
                     xaxis_title='Year',
                     yaxis_title='Percentage Difference',
                     xaxis_range=[2007, 2022],
                     yaxis_range=[-50, 50],
                     height=500,
                     width=1000)

# Show the figure
Figure.show()
%store Figure
Stored 'Figure' (Figure)
In [13]:
#plot fuel prices and incoming transport in 1 figure
#Make sure the graph can have two y-axis
Figure2 = make_subplots(specs=[[{'secondary_y': True}]])


# Add the incoming transport
Figure2.add_trace(
    go.Scatter(x=transport_data_incoming['Perioden'],
               y=transport_data_incoming['1000 kg'],
               mode='lines',
               name='Incoming transport'), secondary_y=False)

# add the fuel prices
for i in ['Benzine', 'Diesel', 'Lpg']:
    Figure2.add_trace(
        go.Scatter(x=fuel_data['Perioden'],
                   y=fuel_data[i],
                   mode='lines',
                   name=i), secondary_y=True)

# Update the layout with axis labels
Figure2.update_layout(
    title='Fuel price vs Incoming transport',
    xaxis_title='Perioden',
    yaxis_title='Transport in Ton',
    yaxis2_title='Fuel Prices')

# Show the plot
Figure2.show()
%store Figure2
Stored 'Figure2' (Figure)

How do fuel prices influence throughgoing transport of goods in the Netherlands?¶

Inbound throughgoing¶

In [14]:
# show price difference and transport difference in percentage in one plot

# Create a plotly figure
Figure3 = px.line()

# Add transport inbound throughgoing difference % dataset
Figure3.add_scatter(x=transport_data_inthrough['Perioden'],
                   y=transport_data_inthrough['Transport inbound throughgoing Diff (%)'],
                   mode='lines+text',
                   name='Transport inbound throughgoing (%)',
                   text=transport_data_inthrough['Transport inbound throughgoing Diff (%)'].round(1).astype(str) + '%',
                   textposition="top center")

# Add the fuel prices, for three different columns benzine, diesel and lpg
for i in ['Benzine Price Diff (%)', 'Diesel Price Diff (%)', 'Lpg Price Diff (%)']:
    Figure3.add_scatter(x=fuel_data['Perioden'],
                       y=fuel_data[i],
                       mode='lines+text',
                       name=i,
                       text=fuel_data[i].round(1).astype(str) + '%',
                       textposition="top center")

# Update the layout of the figure, add a title, x-axis title, and y-axis title
Figure3.update_layout(title='Percentage differences over time with respect to the first year',
                     xaxis_title='Year',
                     yaxis_title='Percentage Difference',
                     xaxis_range=[2007, 2022],
                     yaxis_range=[-50, 100],
                     height=500,
                     width=1000)

# Show the figure
Figure3.show()
%store Figure3
Stored 'Figure3' (Figure)
In [15]:
#plot fuel prices and incoming transport in 1 figure
#Make sure the graph can have two y-axis
Figure4 = make_subplots(specs=[[{'secondary_y': True}]])


# Add the inbound throughgoing transport
Figure4.add_trace(
    go.Scatter(x=transport_data_inthrough['Perioden'],
               y=transport_data_inthrough['1000 kg'],
               mode='lines',
               name='Inbound throughgoing transport'), secondary_y=False)

# add the fuel prices
for i in ['Benzine', 'Diesel', 'Lpg']:
    Figure4.add_trace(
        go.Scatter(x=fuel_data['Perioden'],
                   y=fuel_data[i],
                   mode='lines',
                   name=i), secondary_y=True)

# Update the layout with axis labels
Figure4.update_layout(
    title='Fuel price vs Inbound throughgoing transport',
    xaxis_title='Perioden',
    yaxis_title='Transport in Ton',
    yaxis2_title='Fuel Prices')

# Show the plot
Figure4.show()
%store Figure4
Stored 'Figure4' (Figure)

Outbound throughgoing¶

In [16]:
# show price difference and transport difference in percentage in one plot

# Create a plotly figure
Figure5 = px.line()

# Add transport outbound throughgoing difference % dataset
Figure5.add_scatter(x=transport_data_outthrough['Perioden'],
                   y=transport_data_outthrough['Transport outbound throughgoing Diff (%)'],
                   mode='lines+text',
                   name='Transport outbound throughgoing',
                   text=transport_data_outthrough['Transport outbound throughgoing Diff (%)'].round(1).astype(str) + '%',
                   textposition='top center')

# Add the fuel prices, for three different columns benzine, diesel and lpg
for i in ['Benzine Price Diff (%)', 'Diesel Price Diff (%)', 'Lpg Price Diff (%)']:
    Figure5.add_scatter(x=fuel_data['Perioden'],
                       y=fuel_data[i],
                       mode='lines+text',
                       name=i,
                       text=fuel_data[i].round(1).astype(str) + '%',
                       textposition="top center")

# Update the layout of the figure, add a title, x-axis title, and y-axis title
Figure5.update_layout(title='Percentage differences over time with respect to the first year',
                     xaxis_title='Year',
                     yaxis_title='Percentage Difference',
                     xaxis_range=[2007, 2022],
                     yaxis_range=[-50, 50],
                     height=500,
                     width=1000)

# Show the figure
Figure5.show()
%store Figure5
Stored 'Figure5' (Figure)
In [17]:
#plot fuel prices and incoming transport in 1 figure
#Make sure the graph can have two y-axis
Figure6 = make_subplots(specs=[[{'secondary_y': True}]])


# Add the outbound throughgoing transport
Figure6.add_trace(
    go.Scatter(x=transport_data_outthrough['Perioden'],
               y=transport_data_outthrough['1000 kg'],
               mode='lines',
               name='Outbound throughgoing transport'), secondary_y=False)

# add the fuel prices
for i in ['Benzine', 'Diesel', 'Lpg']:
    Figure6.add_trace(
        go.Scatter(x=fuel_data['Perioden'],
                   y=fuel_data[i],
                   mode='lines',
                   name=i), secondary_y=True)

# Update the layout with axis labels
Figure6.update_layout(
    title='Fuel price vs Outbound throughgoing transport',
    xaxis_title='Perioden',
    yaxis_title='Transport in Ton',
    yaxis2_title='Fuel Prices')

# Show the plot
Figure6.show()
%store Figure6
Stored 'Figure6' (Figure)

How do fuel prices influence outgoing transport of goods in the Netherlands?¶

In [18]:
# show price difference and transport difference in percentage in one plot

# Create a plotly figure
Figure7 = px.line()

# Add transport outgoing difference % dataset
Figure7.add_scatter(x=transport_data_outgoing['Perioden'],
                   y=transport_data_outgoing['Transport outgoing Diff (%)'],
                   mode='lines+text',
                   name='Transport outgoing',
                   text=transport_data_outgoing['Transport outgoing Diff (%)'].round(1).astype(str) + '%',
                   textposition='top center')

# Add the fuel prices, for three different columns benzine, diesel and lpg
for i in ['Benzine Price Diff (%)', 'Diesel Price Diff (%)', 'Lpg Price Diff (%)']:
    Figure7.add_scatter(x=fuel_data['Perioden'],
                       y=fuel_data[i],
                       mode='lines+text',
                       name=i,
                       text=fuel_data[i].round(1).astype(str) + '%',
                       textposition="top center")

# Update the layout of the figure, add a title, x-axis title, and y-axis title
Figure7.update_layout(title='Percentage differences over time with respect to the first year',
                     xaxis_title='Year',
                     yaxis_title='Percentage Difference',
                     xaxis_range=[2007, 2022],
                     yaxis_range=[-50, 50],
                     height=500,
                     width=1000)

# Show the figure
Figure7.show()
%store Figure7
Stored 'Figure7' (Figure)
In [19]:
#plot fuel prices and incoming transport in 1 figure
#Make sure the graph can have two y-axis
Figure8 = make_subplots(specs=[[{'secondary_y': True}]])


# Add the outgoing transport
Figure8.add_trace(
    go.Scatter(x=transport_data_outgoing['Perioden'],
               y=transport_data_outgoing['1000 kg'],
               mode='lines',
               name='Outgoing transport'), secondary_y=False)

# add the fuel prices
for i in ['Benzine', 'Diesel', 'Lpg']:
    Figure8.add_trace(
        go.Scatter(x=fuel_data['Perioden'],
                   y=fuel_data[i],
                   mode='lines',
                   name=i), secondary_y=True)

# Update the layout with axis labels
Figure8.update_layout(
    title='Fuel price vs Outgoing transport',
    xaxis_title='Perioden',
    yaxis_title='Transport in Ton',
    yaxis2_title='Fuel Prices')

# Show the plot
Figure8.show()
%store Figure8
Stored 'Figure8' (Figure)
In [ ]:
 
In [ ]: